#=======================================================================================#
#
# Replication code for 
# "Rewarding loyalty: Selective reassurance and enforcement of asymmetric alliances"
#
#       Yasuki Kudo
#
#       Last modified: 2024-09-29
#=======================================================================================#

rm(list=ls()) 

# packages
#=======================================================================================#
library(tidyverse)
library(haven)
library(missRanger)
library(mice)
library(estimatr)
library(marginaleffects)
library(statar)
library(gridExtra)
library(modelsummary)
library(interflex)
library(foreign)

set.seed(1234)

# Duplicate df 
#=======================================================================================#
load("kudo2024jpr_data.rdata")

# h: 1 if signal includes all factors (except for alliance); 2 if troop is excluded
for(h in 1:2) {
  
  new.dat <- list()
  
  if(h == 1) { # main analysis
    
    df$signal.mean <- df$signal_nopact_mean
    df$signal.sd <- df$signal_nopact_sd
    df$L.signal.mean <- df$L1signal_nopact_mean
    df$L.signal.sd <- df$L1signal_nopact_sd
    
  }
  
  if(h == 2) { # w/o US troops
    
    df$signal.mean <- df$signal_notroops_mean
    df$signal.sd <- df$signal_notroops_sd
    df$L.signal.mean <- df$L1signal_notroops_mean
    df$L.signal.sd <- df$L1signal_notroops_sd
    
  }
  
  # ten data sets
  for(i in 1:10) {
    
    new.dat[[i]] <- df
    
  }
  
  # ten draws
  for(i in 1:10) {
    
    # reassurance
    mean <- df$signal.mean
    sd <- df$signal.sd
    
    new.dat[[i]]$signal <- rnorm(rep(1, nrow(df)), mean, sd)
    
    # reassurance at t+1
    L.mean <- df$L.signal.mean
    L.sd <- df$L.signal.sd
    
    new.dat[[i]]$L.signal <- rnorm(rep(1, nrow(df)), L.mean, L.sd)
    
  }
  
  # save
  if(h == 1) new.dat ->> new.dat.pact
  if(h == 2) new.dat ->> new.dat.troops
  
} 

rm(new.dat)

# Main analysis and Additional Analysis 1
#=======================================================================================#
#=======================================================================================#

# estimate models
#=======================================================================================#

for(h in 1:2) {
  
  if(h == 1) new.dat <- new.dat.pact
  if(h == 2) new.dat <- new.dat.troops
  
  for(i in 1:12) {
    
    # i == 1: Model 1 in Table I/Model A1 in Table AI
    # i == 2: Model 2 in Table I/Model A2 in Table AI
    # i == 3: Model 3 in Table I/Model A3 in Table AI
    # i == 4: Model 4 in Table I/Model A4 in Table AI
    # i == 5: Model A5 in Table AII (Nuc latency*unga)
    # i == 6: Model A6 in Table AII (Nuc latency*important unga)
    # i == 7: Model A7 in Table AII (Nuc possession*unga)
    # i == 8: Model A8 in Table AII (Nuc posession*important unga)
    # i == 9: Model A9 in Table AII  (Nuc latency*unga + Nuc possession*unga)
    # i == 10: Model A10 in Table AII (Nuc latency*important unga + Nuc posession*important unga)
    # i == 11: Model A11 in Table AIII (Nuc latency*unga (v-clare))
    # i == 12: Model A12 in Table AIII (Nuc latency*important unga (v-clare))
    
    if(h == 2 & i > 4) next
    
    # all UNGA Res., all time period
    if(i %in% c(1, 2, 5, 7, 9, 11)){
      
      for(j in 1:10) {
        
        new.dat[[j]]$unga <- df$unga
        new.dat[[j]]$time <- df$timeC
        
      }
    }
    
    # important UNGA Res., year >= 1983
    if(i %in% c(3, 4, 6, 8, 10, 12)){
      
      for(j in 1:10) {
        
        new.dat[[j]]$unga <- df$ungaImpt
        new.dat[[j]]$time <- df$time2C
        
      }
    }
    
    # conditional variable
    # sdp
    if(i %in% c(1:4)) {
      
      for(j in 1:10) {
        
        new.dat[[j]]$condit.var <- df$logsdp
        
      }
    }
    
    # nuc latency (F&T2015)
    if(i %in% c(5:10)) {
      
      for(j in 1:10) {
        
        new.dat[[j]]$condit.var <- as.factor(df$latency_pilot)
        
      }
    }
    
    # nuc latency (v-clare)
    if(i %in% c(11, 12)) {
      
      for(j in 1:10) {
        
        new.dat[[j]]$condit.var <- df$Nu_infrastructure
        
      }
    }
    
    control <- 
      c("USlogtrade", "RUSlogtrade", "CHNlogtrade", "USdist", "RUSdist", "CHNdist", "threats", "logpop",
        "Sscore", "polity2", "mid5", "as.factor(region)", "time", "I(time^2)", "I(time^3)")
    
    # formula for models 1 and 3 (no interaction term)
    if(i %in% c(1, 3)) {
      
      formula <- update(L.signal ~ signal + unga + condit.var, reformulate(c(".", control)))
      formula <- Reduce(paste, deparse(formula))
      
    }
    
    # formula for models 2 and 4 (interaction terms)
    if(i %in% c(2, 4)) {
      
      formula <- update(L.signal ~ signal + unga*condit.var, reformulate(c(".", control)))
      formula <- Reduce(paste, deparse(formula))
      
    }
    
    if(i %in% c(5, 6)) { # unga*latency + posession
      
      formula <- update(L.signal ~ signal + unga*condit.var + possession, reformulate(c(".", control)))
      formula <- Reduce(paste, deparse(formula))
      
    }
    
    if(i %in% c(7, 8)) { # unga*posession + latency (no figure)
      
      formula <- update(L.signal ~ signal + unga*possession + condit.var, reformulate(c(".", control)))
      formula <- Reduce(paste, deparse(formula))
      
    }
    
    if(i %in% c(9, 10)) { # unga*posession + unga*latency (no figure)
      
      formula <- update(L.signal ~ signal +  unga*condit.var + unga*possession, reformulate(c(".", control)))
      formula <- Reduce(paste, deparse(formula))
      
    }
    
    if(i %in% c(11, 12)) { # unga*latency (v-clare) + posession
      
      formula <- update(L.signal ~ signal + unga*condit.var + possession, reformulate(c(".", control)))
      formula <- Reduce(paste, deparse(formula))
      
    }
    
    fit_reg <- function(dat) {
      
      formula <- as.formula(formula)
      mod <- lm_robust(formula, data = dat, clusters = ccode, se_type = "stata")
      
    }
    
    models <- lapply(new.dat, function(x) fit_reg(dat = x))
    
    pooled_fit <- pool(models)
    
    if(i %in% c(2, 4, 5, 6, 11, 12)) {
      
      int.condit_reg <- function(dat, condit.var) {
        
        formula <- as.formula(formula)
        mod <- lm_robust(formula, data = dat, clusters = ccode, se_type = "stata")
        comp <- comparisons(mod, variables = list(unga = "2sd"), newdata = datagrid(condit.var = condit.var))
        
      }
      
      if(i %in% c(2, 11)) list.condit.var <- quantile(new.dat[[1]]$condit.var, prob = seq(0, 1, length = 15), na.rm = T)
      if(i %in% c(4, 12)) list.condit.var <- quantile(subset(new.dat[[1]], year >= 1983)$condit.var, prob = seq(0, 1, length = 15), na.rm = T)
      if(i %in% c(5, 6)) list.condit.var <- c("0", "1")
      
      for(j in 1:length(list.condit.var)) {
        
        if(j == 1) {fd.int.condit.pooled <- tibble()}
        
        sub <- lapply(new.dat, function(x) int.condit_reg(dat = x, condit.var = list.condit.var[j]))
        sub.pooled <- summary(pool(sub))
        fd.int.condit.pooled <- rbind(fd.int.condit.pooled, sub.pooled)
        
        cat("+")
        
      }
      
      fd.int.condit.pooled$condit.var <- list.condit.var
      
      if(h == 1 && i == 2) fd.int.condit.pooled ->> fd2.sdp.pooled.pact
      if(h == 2 && i == 2) fd.int.condit.pooled ->> fd2.sdp.pooled.troops
      
      if(h == 1 && i == 4) fd.int.condit.pooled ->> fd4.sdp.pooled.pact
      if(h == 2 && i == 4) fd.int.condit.pooled ->> fd4.sdp.pooled.troops
      
      if(h == 1 && i == 5) fd.int.condit.pooled ->> fd2.nuc1.pooled.pact
      if(h == 1 && i == 6) fd.int.condit.pooled ->> fd4.nuc1.pooled.pact
      
      if(h == 1 && i == 11) fd.int.condit.pooled ->> fd2.nuc2.pooled.pact
      if(h == 1 && i == 12) fd.int.condit.pooled ->> fd4.nuc2.pooled.pact
      
      # second diff.
      if(i %in% c(2, 11)) {
        
        condit.var.h <- max(new.dat[[1]]$condit.var, na.rm = T)
        condit.var.l <- min(new.dat[[1]]$condit.var, na.rm = T)
        
      }
      
      if(i %in% c(4, 12)){
        
        condit.var.h <- max(subset(new.dat[[1]], year >= 1983)$condit.var, na.rm = T) 
        condit.var.l <- min(subset(new.dat[[1]], year >= 1983)$condit.var, na.rm = T)
        
      }
      
      if(i %in% c(5, 6)) {
        
        condit.var.h <- "1"
        condit.var.l <- "0"
        
      }
      
      second.diff <- function(dat) {
        
        formula <- as.formula(formula)
        mod <- lm_robust(formula, data = dat, clusters = ccode, se_type = "stata")
        comp <- comparisons(mod, variables = list(unga = "2sd"), hypothesis = "pairwise",
                            newdata = datagrid(condit.var = c(condit.var.h, condit.var.l)))
        
      }
      
      sec.diff.pool <- lapply(new.dat, function(x) second.diff(dat = x))
      
      coef <- c(sec.diff.pool[[1]][["estimate"]], sec.diff.pool[[2]][["estimate"]], 
                sec.diff.pool[[3]][["estimate"]], sec.diff.pool[[4]][["estimate"]], 
                sec.diff.pool[[5]][["estimate"]], sec.diff.pool[[6]][["estimate"]], 
                sec.diff.pool[[7]][["estimate"]], sec.diff.pool[[8]][["estimate"]], 
                sec.diff.pool[[9]][["estimate"]], sec.diff.pool[[10]][["estimate"]])
      
      se <- c(sec.diff.pool[[1]][["std.error"]], sec.diff.pool[[2]][["std.error"]], 
              sec.diff.pool[[3]][["std.error"]], sec.diff.pool[[4]][["std.error"]], 
              sec.diff.pool[[5]][["std.error"]], sec.diff.pool[[6]][["std.error"]], 
              sec.diff.pool[[7]][["std.error"]], sec.diff.pool[[8]][["std.error"]], 
              sec.diff.pool[[9]][["std.error"]], sec.diff.pool[[10]][["std.error"]])
      
      se.between <- var(coef)
      se.within <- mean(se^2)
      coef.mean <- mean(coef)
      se.est <- sqrt(se.within + se.between*(1 + (1/10)))
      
      ciu90 <- coef.mean + 1.645*se.est
      cil90 <- coef.mean - 1.645*se.est
      
      ciu95 <- coef.mean + 1.96*se.est
      cil95 <- coef.mean - 1.96*se.est
      
      sec.dif90 <- paste0(round(coef.mean, 3), "[",round(cil90, 3), ", ", round(ciu90, 3), "]")
      sec.dif95 <- paste0(round(coef.mean, 3), "[",round(cil95, 3), ", ", round(ciu95, 3), "]")
      
      if(h == 1 && i == 2) sec.dif90 ->> sec90.m2.pact.sdp
      if(h == 2 && i == 2) sec.dif90 ->> sec90.m2.troop.sdp
      
      if(h == 1 && i == 4) sec.dif90 ->> sec90.m4.pact.sdp
      if(h == 2 && i == 4) sec.dif90 ->> sec90.m4.troop.sdp
      
      if(h == 1 && i == 5) sec.dif90 ->> sec90.m2.pact.nuc1
      if(h == 1 && i == 6) sec.dif90 ->> sec90.m4.pact.nuc1
      
      if(h == 1 && i == 11) sec.dif90 ->> sec90.m2.pact.nuc2
      if(h == 1 && i == 12) sec.dif90 ->> sec90.m4.pact.nuc2
      
      if(h == 1 && i == 2) sec.dif95 ->> sec95.m2.pact.sdp
      if(h == 2 && i == 2) sec.dif95 ->> sec95.m2.troop.sdp
      
      if(h == 1 && i == 4) sec.dif95 ->> sec95.m4.pact.sdp
      if(h == 2 && i == 4) sec.dif95 ->> sec95.m4.troop.sdp
      
      if(h == 1 && i == 5) sec.dif95 ->> sec95.m2.pact.nuc1
      if(h == 1 && i == 6) sec.dif95 ->> sec95.m4.pact.nuc1
      
      if(h == 1 && i == 11) sec.dif95 ->> sec95.m2.pact.nuc2
      if(h == 1 && i == 12) sec.dif95 ->> sec95.m4.pact.nuc2
      
    }
    
    if(h == 1 && i == 1) pooled_fit ->> pooled_nopact_fit1
    if(h == 1 && i == 2) pooled_fit ->> pooled_nopact_fit2
    if(h == 1 && i == 3) pooled_fit ->> pooled_nopact_fit3
    if(h == 1 && i == 4) pooled_fit ->> pooled_nopact_fit4
    if(h == 1 && i == 5) pooled_fit ->> pooled_nopact_fit5
    if(h == 1 && i == 6) pooled_fit ->> pooled_nopact_fit6
    if(h == 1 && i == 7) pooled_fit ->> pooled_nopact_fit7
    if(h == 1 && i == 8) pooled_fit ->> pooled_nopact_fit8
    if(h == 1 && i == 9) pooled_fit ->> pooled_nopact_fit9
    if(h == 1 && i == 10) pooled_fit ->> pooled_nopact_fit10
    if(h == 1 && i == 11) pooled_fit ->> pooled_nopact_fit11
    if(h == 1 && i == 12) pooled_fit ->> pooled_nopact_fit12
    
    if(h == 2 && i == 1) pooled_fit ->> pooled_notroops_fit1
    if(h == 2 && i == 2) pooled_fit ->> pooled_notroops_fit2
    if(h == 2 && i == 3) pooled_fit ->> pooled_notroops_fit3
    if(h == 2 && i == 4) pooled_fit ->> pooled_notroops_fit4
    
  }
}

# figures
#=======================================================================================#

for(i in 1:6) {
  
  if(i == 1) {
    
    fd.dat <- fd2.sdp.pooled.pact
    lab <- ggtitle("Model 2 (All resolutions)")
    df.new <- df
    ylim <- ylim(-1, 1)
    df.new$condit.var <- df.new$logsdp
    xlab <- xlab("SDP (log)") 
    
  }
  
  if(i == 2) {
    
    fd.dat <- fd4.sdp.pooled.pact
    lab <- ggtitle("Model 4 (Important resolutions)")
    df.new <- subset(df, year >= 1983)
    ylim <- ylim(-1, 1)
    df.new$condit.var <- df.new$logsdp
    xlab <- xlab("SDP (log)") 
    
  }  
  
  if(i == 3) {
    
    fd.dat <- fd2.sdp.pooled.troops
    lab <- ggtitle("Model A2 (All resolutions)")
    df.new <- df
    ylim <- ylim(-1, 1)
    df.new$condit.var <- df.new$logsdp
    xlab <- xlab("SDP (log)") 
    
  }
  
  if(i == 4) {
    
    fd.dat <- fd4.sdp.pooled.troops
    lab <- ggtitle("Model A4 (Important resolutions)")
    df.new <- subset(df, year >= 1983)
    ylim <- ylim(-1, 1)
    df.new$condit.var <- df.new$logsdp
    xlab <- xlab("SDP (log)") 
    
  }  
  
  if(i == 5) {
    
    fd.dat <- fd2.nuc2.pooled.pact
    lab <- ggtitle("Model A11 (All resolutions)")
    df.new <- df
    ylim <- ylim(-0.8, 1.7)
    df.new$condit.var <- df.new$Nu_infrastructure
    xlab <- xlab("Nuclear Proficiency") 
    
  }
  
  if(i == 6) {
    
    fd.dat <- fd4.nuc2.pooled.pact
    lab <- ggtitle("Model A12 (Important resolutions)")
    df.new <- subset(df, year >= 1983)
    ylim <- ylim(-0.8, 1.7)
    df.new$condit.var <- df.new$Nu_infrastructure
    xlab <- xlab("Nuclear Proficiency") 
    
  } 
  
  fd.dat %>% 
    ggplot() +
    theme_bw() +
    geom_ribbon(aes(x=condit.var, ymax = estimate + 1.96*std.error, ymin = estimate - 1.96*std.error), alpha = 0.4) +
    geom_ribbon(aes(x=condit.var, ymax = estimate + 1.645*std.error, ymin = estimate - 1.645*std.error), alpha = 0.6) +
    geom_line(aes(x = condit.var, y = estimate), size = 2) +
    geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
    geom_vline(xintercept = pctile(df.new$condit.var, na.rm = T, probs = 0.5), size = 0.5, linetype = "dashed") +
    geom_rug(aes(condit.var), df.new) +
    ylim +
    theme(text = element_text(size = 25),
          plot.title = element_text(size = 25),
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          axis.text.x = element_text(size = 15, color = "black"),
          axis.text.y = element_text(size = 15, color = "black"),
          plot.margin=unit(c(3,3,3,3), 'mm')) +
    xlab + ylab("Difference in Fitted Values") + lab -> fdplot.sdp
  
  # save figures
  if(i == 1) fdplot.sdp ->> fdplot.sdp.m2pact
  if(i == 2) fdplot.sdp ->> fdplot.sdp.m4pact
  if(i == 3) fdplot.sdp ->> fdplot.sdp.m2troops
  if(i == 4) fdplot.sdp ->> fdplot.sdp.m4troops
  if(i == 5) fdplot.sdp ->> fdplot.nuc2.m5pact
  if(i == 6) fdplot.sdp ->> fdplot.nuc2.m6pact
  
}

for(i in 1:2) {
  
  if(i == 1) {
    fd.dat <- fd2.nuc1.pooled.pact
    lab <- ggtitle("Model A5 (All resolutions)")
  }
  
  if(i == 2) {
    fd.dat <- fd4.nuc1.pooled.pact
    lab <- ggtitle("Model A6 (Important resolutions)")
  }  
  
  fd.dat %>% 
    ggplot() +
    theme_bw() +
    geom_pointrange(aes(x = condit.var, y = estimate,
                        ymax = estimate + 1.96*std.error, 
                        ymin = estimate - 1.96*std.error),
                    size = 2, linewidth = 1) +
    geom_linerange(aes(x = condit.var, y = estimate, 
                       ymax = estimate + 1.645*std.error, 
                       ymin = estimate - 1.645*std.error), 
                   size = 5, linewidth = 2.5) +
    geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
    ylim(0, 1.5) +
    theme(text = element_text(size = 25),
          plot.title = element_text(size = 25),
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          axis.text.x = element_text(size = 15, color = "black"),
          axis.text.y = element_text(size = 15, color = "black"),
          plot.margin=unit(c(3,3,3,3), 'mm')) +
    xlab("Nuclear Latency \n Fuhrmann & Tkach (2015)") + ylab("Difference in Fitted Values") + lab -> fdplot.sdp
  
  # save figures
  if(i == 1) fdplot.sdp ->> fdplot.nuc1.m5pact
  if(i == 2) fdplot.sdp ->> fdplot.nuc1.m6pact
}

# export tables
#=======================================================================================#
# table
coef_map1 <- c(unga = "Voting sim.",
               ungaImpt = "Voting sim.",
               logsdp = "SDP",
               possession = "Nuclear Weapon Possession",
               condit.var = "SDP/Nuc Latency",
               condit.var1 = "SDP/Nuc Latency",
               `unga:condit.var` = "Voting sim. x SDP/Nuc Latency",
               `ungaImpt:condit.var` = "Voting sim. x SDP/Nuc Latency",
               `unga:condit.var1` = "Voting sim. x SDP/Nuc Latency",
               `ungaImpt:condit.var1` = "Voting sim. x SDP/Nuc Latency",
               `unga:possession` = "Voting sim. x Possession",
               `ungaImpt:possession` = "Voting sim. x  Possession",
               `signal:logsdp` = "Reassurance. x SDP",
               USlogtrade = "Trade w. US",
               RUSlogtrade = "Trade w. Russia",
               CHNlogtrade = "Trade w. China",
               USdist = "Dist. from US",
               RUSdist = "Dist. frm Russia",
               CHNdist = "Dist. frm China",
               threats = "External threats",
               polity2 = "Democracy",
               Sscore = "Foreign policy sim.",
               mid5 = "MID",
               logpop = "Population",
               signal = "Reassurance (LDV)")

title_main <- "Tab. 1: OLS Regressions on U.S. Reassurance"
title_notroops <- "Tab. A1: Robustness Checks Excluding Troop Deployments"
title_nuc1 <- "Tab. A2: Robustness Checks Using Nuclear Latency"
title_nuc2 <- "Tab. A3: Robustness Checks Using Nuclear Proficiency"

modelsummary(list("Model 1" = pooled_nopact_fit1, "Model 2" = pooled_nopact_fit2,
                  "Model 3" = pooled_nopact_fit3, "Model 4" = pooled_nopact_fit4),
             align = "ldddd",
             #output = "latex",
             stars = c('+' = .1, '*' = .05, '**' = .01), 
             coef_map = coef_map1,
             gof_map = c("nobs", "adj.r.squared"),
             title = title_main) 

modelsummary(list("Model A1" = pooled_notroops_fit1, "Model A2" = pooled_notroops_fit2,
                  "Model A3" = pooled_notroops_fit3, "Model A4" = pooled_notroops_fit4),
             align = "ldddd",
             #output = "latex",
             stars = c('+' = .1, '*' = .05, '**' = .01), 
             coef_map = coef_map1,
             gof_map = c("nobs", "adj.r.squared"),
             title = title_notroops) 

modelsummary(list("Model A5" = pooled_nopact_fit5, "Model A6" = pooled_nopact_fit6, 
                  "Model A7" = pooled_nopact_fit7, "Model A8" = pooled_nopact_fit8,
                  "Model A9" = pooled_nopact_fit9, "Model A10" = pooled_nopact_fit10),
             align = "ldddddd",
             # output = "latex",
             stars = c('+' = .1, '*' = .05, '**' = .01), 
             coef_map = coef_map1, gof_map = c("nobs", "adj.r.squared"),
             title = title_nuc1)

modelsummary(list("Model A11" = pooled_nopact_fit11, "Model A12" = pooled_nopact_fit12),
             align = "ldd",
             # output = "latex",
             stars = c('+' = .1, '*' = .05, '**' = .01), 
             coef_map = coef_map1, gof_map = c("nobs", "adj.r.squared"), 
             title = title_nuc2) 

# export figures
#=======================================================================================#
grid.arrange(fdplot.sdp.m2pact, fdplot.sdp.m4pact, ncol = 2) -> fd.plot.combine.sdp.pact
grid.arrange(fdplot.sdp.m2troops, fdplot.sdp.m4troops, ncol = 2) -> fd.plot.combine.sdp.troops
grid.arrange(fdplot.nuc1.m5pact, fdplot.nuc1.m6pact, ncol = 2) -> fd.plot.combine.nuc1
grid.arrange(fdplot.nuc2.m5pact, fdplot.nuc2.m6pact, ncol = 2) -> fd.plot.combine.nuc2

ggsave("mainsdp.pdf", fd.plot.combine.sdp.pact, width = 13, height = 6)
ggsave("mainsdp2.jpg", fd.plot.combine.sdp.pact, width = 13, height = 6, dpi = 2000)

ggsave("mainsdp_notroops.pdf", fd.plot.combine.sdp.troops, width = 13, height = 6)
ggsave("mainnuc1.pdf", fd.plot.combine.nuc1, width = 13, height = 6)
ggsave("mainnuc2.pdf", fd.plot.combine.nuc2, width = 13, height = 6)

# Additional analysis 2: examining effects of reassurance on voting similarity
#=======================================================================================#
#=======================================================================================#

# estimating models
#=======================================================================================#

new.dat <- new.dat.pact

# ten draws
for(i in 1:10) {
  
  # reassurance at t-1
  Lag1.mean <- df$Lag1signal_nopact_mean
  Lag1.sd <- df$Lag1signal_nopact_sd
  
  new.dat[[i]]$Lag1.signal <- rnorm(rep(1, nrow(df)), Lag1.mean, Lag1.sd)
  
  # reassurance at t-2
  Lag2.mean <- df$Lag2signal_nopact_mean
  Lag2.sd <- df$Lag2signal_nopact_sd
  
  new.dat[[i]]$Lag2.signal <- rnorm(rep(1, nrow(df)), Lag2.mean, Lag2.sd)
  
  # reassurance ave
  
  new.dat[[i]]$Lag.mva.signal <- (new.dat[[i]]$signal + new.dat[[i]]$Lag1.signal + new.dat[[i]]$Lag2.signal)/3
  
} 

for(h in 1:2) {
  
  if(h == 2) {
    
    for(i in 1:10) {
      
      new.dat[[i]]$signal <- new.dat[[i]]$Lag.mva.signal 
      
    }
  }
  
  # four models
  for(i in 1:4) {
    
    # models 1 and 2 (all UNGA Res.)
    if(i %in% c(1, 2)) {
      
      for(j in 1:10) {
        
        new.dat[[j]]$L1unga <- df$L1unga
        new.dat[[j]]$unga <- df$unga
        new.dat[[j]]$time <- df$timeC
        
      }
    }
    
    # models 3 and 4 (important UNGA Res.)
    if(i %in% c(3, 4)) {
      
      for(j in 1:10) {
        
        new.dat[[j]]$L1unga <- df$L1ungaImpt
        new.dat[[j]]$unga <- df$ungaImpt
        new.dat[[j]]$time <- df$time2C
        
      }
    }
    
    control <- 
      c("USlogtrade", "RUSlogtrade", "CHNlogtrade", "USdist", "RUSdist", "CHNdist", "threats", "logpop",
        "Sscore", "polity2", "mid5", "as.factor(region)", "time", "I(time^2)", "I(time^3)")
    
    if(i %in% c(1, 3)) {
      
      formula <- update(L1unga ~ unga + signal + logsdp, reformulate(c(".", control)))
      formula <- Reduce(paste, deparse(formula))
      
    }
    
    if(i %in% c(2, 4)) {
      
      formula <- update(L1unga ~ unga + signal*logsdp, reformulate(c(".", control)))
      formula <- Reduce(paste, deparse(formula))
      
    }
    
    fit_reg <- function(dat) {
      
      formula <- as.formula(formula)
      mod <- lm_robust(formula, data = dat, clusters = ccode, se_type = "stata")
      
    }
    
    models <- lapply(new.dat, function(x) fit_reg(dat = x))
    
    summary(pooled_fit <- pool(models))
    
    if(h == 1 & i == 1) pooled_fit ->> pooled_nopact_revfit1
    if(h == 1 & i == 2) pooled_fit ->> pooled_nopact_revfit2
    if(h == 1 & i == 3) pooled_fit ->> pooled_nopact_revfit3
    if(h == 1 & i == 4) pooled_fit ->> pooled_nopact_revfit4
    
    if(h == 2 & i == 1) pooled_fit ->> pooled_nopact_revfit1.mva
    if(h == 2 & i == 2) pooled_fit ->> pooled_nopact_revfit2.mva
    if(h == 2 & i == 3) pooled_fit ->> pooled_nopact_revfit3.mva
    if(h == 2 & i == 4) pooled_fit ->> pooled_nopact_revfit4.mva
    
    if(i %in% c(2, 4)){
      
      int.sdp_reg.rev <- function(dat, sdp.val) {
        
        formula <- as.formula(formula)
        mod <- lm_robust(formula, data = dat, clusters = ccode, se_type = "stata")
        comp <- comparisons(mod, variables = list(signal = "2sd"), newdata = datagrid(logsdp = sdp.val))
        
      }
      
      if(i == 2)list.sdp <-  quantile(df$logsdp, prob = seq(0, 1, length = 15), na.rm = T)
      if(i == 4)list.sdp <-  quantile(subset(df, year >= 1983)$logsdp, prob = seq(0, 1, length = 15), na.rm = T)
      
      for(j in 1:length(list.sdp)){
        
        if(j == 1){fd.int.sdp.pooled.rev <- tibble()}
        
        sub <- lapply(new.dat, function(x) int.sdp_reg.rev(dat = x, sdp.val = list.sdp[j]))
        sub.pooled <- summary(pool(sub))
        fd.int.sdp.pooled.rev <- rbind(fd.int.sdp.pooled.rev, sub.pooled)
        
        cat("+")
        
      }
      
      fd.int.sdp.pooled.rev$sdp <- list.sdp
      
      if(h == 1 & i == 2) fd.int.sdp.pooled.rev ->> fd2.sdp.pooled.pact.rev
      if(h == 1 & i == 4) fd.int.sdp.pooled.rev ->> fd4.sdp.pooled.pact.rev
      
      if(h == 2 & i == 2) fd.int.sdp.pooled.rev ->> fd2.sdp.pooled.pact.rev.mva
      if(h == 2 & i == 4) fd.int.sdp.pooled.rev ->> fd4.sdp.pooled.pact.rev.mva
      
    }
  }
}

# figures
#=======================================================================================#

for(i in 1:4) {
  
  if(i == 1) {
    fd.dat <- fd2.sdp.pooled.pact.rev
    lab <- ggtitle("Model A14 (All resolutions)")
    df.new <- df
  }
  
  if(i == 2) {
    fd.dat <- fd4.sdp.pooled.pact.rev
    lab <- ggtitle("Model A16 (Important resolutions)")
    df.new <- subset(df, year >= 1983)
  }  
  
  if(i == 3) {
    fd.dat <- fd2.sdp.pooled.pact.rev.mva
    lab <- ggtitle("Model A18 (All resolutions)")
    df.new <- df
  }
  
  if(i == 4) {
    fd.dat <- fd4.sdp.pooled.pact.rev.mva
    lab <- ggtitle("Model A20 (Important resolutions)")
    df.new <- subset(df, year >= 1983)
  }  
  
  fd.dat %>% 
    ggplot() +
    theme_bw() +
    geom_ribbon(aes(x=sdp, ymax = estimate + 1.96*std.error, ymin = estimate - 1.96*std.error), alpha = 0.4) +
    geom_ribbon(aes(x=sdp, ymax = estimate + 1.645*std.error, ymin = estimate - 1.645*std.error), alpha = 0.6) +
    geom_line(aes(x=sdp, y=estimate), size = 2) +
    geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
    geom_vline(xintercept = pctile(df.new$logsdp, na.rm = T, probs = 0.5), size = 0.5, linetype = "dashed") +
    geom_rug(aes(logsdp), df.new) +
    theme(text = element_text(size = 25),
          plot.title = element_text(size = 25),
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          axis.text.x = element_text(size = 15, color = "black"),
          axis.text.y = element_text(size = 15, color = "black"),
          plot.margin=unit(c(3,3,3,3), 'mm')) +
    xlab("SDP (log)") + ylab("Difference in Fitted Values") + lab -> fdplot.sdp
  
  if(i == 1) fdplot.sdp ->> fdplot.sdp.m2pact.rev
  if(i == 2) fdplot.sdp ->> fdplot.sdp.m4pact.rev
  if(i == 3) fdplot.sdp ->> fdplot.sdp.m2pact.rev.mva
  if(i == 4) fdplot.sdp ->> fdplot.sdp.m4pact.rev.mva
  
}

# export tables
#=======================================================================================#

# table
coef_map2 <- c(signal = "Reassurance",
               logsdp = "SDP",
               `signal:logsdp` = "Reassurance. x SDP",
               USlogtrade = "Trade w. US",
               RUSlogtrade = "Trade w. Russia",
               CHNlogtrade = "Trade w. China",
               USdist = "Dist. from US",
               RUSdist = "Dist. frm Russia",
               CHNdist = "Dist. frm China",
               threats = "External Threats",
               polity2 = "Democracy",
               Sscore = "Foreign policy sim.",
               mid5 = "MID",
               logpop = "Population",
               unga = "Voting sim. (LDV)")

title_rev1 <- "Tab. A4: Regression of Voting Similarity"
title_rev2 <- "Tab. A5: Regression of Voting Similarity"

modelsummary(list("Model A13" = pooled_nopact_revfit1, "Model A14" = pooled_nopact_revfit2,
                  "Model A15" = pooled_nopact_revfit3, "Model A16" = pooled_nopact_revfit4),
             align = "ldddd",
             #output = "latex",
             stars = c('+' = .1, '*' = .05, '**' = .01), 
             coef_map = coef_map2,
             gof_map = c("nobs", "adj.r.squared"),
             title = title_rev1) 

modelsummary(list("Model A17" = pooled_nopact_revfit1.mva, "Model A18" = pooled_nopact_revfit2.mva,
                  "Model A19" = pooled_nopact_revfit3.mva, "Model A20" = pooled_nopact_revfit4.mva),
             align = "ldddd",
             #output = "latex",
             stars = c('+' = .1, '*' = .05, '**' = .01), 
             coef_map = coef_map2,
             gof_map = c("nobs", "adj.r.squared"),
             title = title_rev2) 

# export figures
#=======================================================================================#
grid.arrange(fdplot.sdp.m2pact.rev, fdplot.sdp.m4pact.rev, ncol = 2) -> fd.plot.combine.sdp.pact.rev
grid.arrange(fdplot.sdp.m2pact.rev.mva, fdplot.sdp.m4pact.rev.mva, ncol = 2) -> fd.plot.combine.sdp.pact.rev.mva
ggsave("mainsdp_rev.pdf", fd.plot.combine.sdp.pact.rev, width = 13, height = 6)
ggsave("mainsdp_rev_mva.pdf", fd.plot.combine.sdp.pact.rev.mva, width = 13, height = 6)

# additional analysis 4: examining nonlinear interaction effects
#=======================================================================================#
#=======================================================================================#

# estimating models
#=======================================================================================#
new.dat <- new.dat.pact

for(i in 1:2) {
  
  # model 2 (all UNGA Res.)
  if(i == 1) {
    
    for(j in 1:10) {
      
      new.dat[[j]]$unga <- df$unga
      new.dat[[j]]$time1 <- df$timeC
      new.dat[[j]]$time2 <- df$timeC^2
      new.dat[[j]]$time3 <- df$timeC^3
      
    }
  }
  
  # model 4 (only important UNGA Res.)
  if(i == 2) {
    
    for(j in 1:10) {
      
      new.dat[[j]]$unga <- df$ungaImpt
      new.dat[[j]]$time1 <- df$time2C
      new.dat[[j]]$time2 <- df$time2C^2
      new.dat[[j]]$time3 <- df$time2C^3
      
    }
  }
  
  y <- "L.signal"
  x <- "logsdp"
  d <- "unga"
  z <- c("USlogtrade", "RUSlogtrade", "CHNlogtrade", "USdist", "RUSdist",
         "CHNdist", "threats", "logpop", "Sscore", "polity2", "mid5", "region1", "region2",
         "region4", "time1", "time2", "time3")
  
  intf_reg <- function(dat) {
    
    intf <- interflex(Y = y, D = d, X = x, Z = z, data = as.data.frame(dat), vcov.type = "cluster",
                      cl = "ccode", estimator = "binning", na.rm = T, nbins = 3)
    
  }
  
  models <- lapply(new.dat, function(x) intf_reg(dat = x))
  
  bin.x <- c(as.data.frame(models[[1]]$est.bin)[1,1], 
             as.data.frame(models[[1]]$est.bin)[2,1],
             as.data.frame(models[[1]]$est.bin)[3,1])
  
  bin.coef1 <- c(NULL)
  bin.coef2 <- c(NULL)
  bin.coef3 <- c(NULL)
  bin.se1 <- c(NULL)
  bin.se2 <- c(NULL)
  bin.se3 <- c(NULL)
  
  for(j in 1:10) {
    
    bin.coef1[j] <- as.data.frame(models[[j]]$est.bin)[1,2]
    bin.coef2[j] <- as.data.frame(models[[j]]$est.bin)[2,2]
    bin.coef3[j] <- as.data.frame(models[[j]]$est.bin)[3,2]
    bin.se1[j] <- as.data.frame(models[[j]]$est.bin)[1,3]
    bin.se2[j] <- as.data.frame(models[[j]]$est.bin)[2,3]
    bin.se3[j] <- as.data.frame(models[[j]]$est.bin)[3,3]
    
  }
  
  par.est <- c(mean(bin.coef1), mean(bin.coef2), mean(bin.coef3))
  se.between <- c(var(bin.coef1), var(bin.coef2), var(bin.coef3))
  se.within <- c(mean(bin.se1^2), mean(bin.se2^2), mean(bin.se3^2))
  
  se.est <- sqrt(se.within + se.between*(1 + (1/10)))
  
  ciu95 <- par.est + 1.96*se.est
  cil95 <- par.est - 1.96*se.est
  ciu90 <- par.est + 1.645*se.est
  cil90 <- par.est - 1.645*se.est
  
  intf.dat <- data.frame(x = bin.x, y = par.est, ymax95 = ciu95, ymin95 = cil95, ymax90 = ciu90, ymin90 = cil90)
  
  if(i == 1) {
    
    lab <- ggtitle("Model 2 (All resolutions)")
    xlab <- xlab("SDP (log)")
    rug <- geom_rug(aes(logsdp), df)
    vline <- pctile(df$logsdp, na.rm = T, probs = 0.5)
    
  }
  
  if(i == 2) {
    
    lab <- ggtitle("Model 4 (Important resolutions)")
    xlab <- xlab("SDP (log)")
    rug <- geom_rug(aes(logsdp), subset(df, year >= 1983))
    vline <- pctile(subset(df, year >= 1983)$logsdp, na.rm = T, probs = 0.5)
    
  }
  
  ggplot(NULL) +
    theme_bw() +
    geom_linerange(aes(x = x, y = y, ymax = ymax90, ymin = ymin90), 
                   data = intf.dat, alpha = 0.5, linewidth = 10) +
    geom_linerange(aes(x = x, y = y, ymax = ymax95, ymin = ymin95),
                   data = intf.dat, alpha = 0.4, linewidth = 10) +
    geom_point(aes(x = x, y = y), data = intf.dat, size = 6, color = "white") +
    geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
    geom_vline(xintercept = vline, size = 0.5, linetype = "dashed") +
    rug +
    theme(text = element_text(size = 25),
          plot.title = element_text(size = 25),
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          axis.text.x = element_text(size = 15, color = "black"),
          axis.text.y = element_text(size = 15, color = "black"),
          plot.margin=unit(c(3,3,3,3), 'mm')) +
    xlab + ylab("Marginal Effects") + lab  -> binplot 
  
  if(i == 1) binplot ->> binplot.m2sdp
  if(i == 2) binplot ->> binplot.m4sdp
  
}

# export figures
#=======================================================================================#
grid.arrange(binplot.m2sdp, binplot.m4sdp, ncol = 2) -> binplot.sdp
ggsave("binplot.sdp.pdf", binplot.sdp, width = 13, height = 6)  

# histograms in Online Appendix
#=======================================================================================#
#=======================================================================================#
for(i in 1:4) {
  
  if(i == 1) {
    new.dat.pact[[1]]$var.hist <- new.dat.pact[[1]]$signal
    xlab <- xlab("Reassuerance")
  }
  
  if(i == 2) {
    new.dat.pact[[1]]$var.hist <- new.dat.pact[[1]]$unga
    xlab <- xlab("Voting Similarity \n (All resolutions)")
  }
  
  if(i == 3) {
    new.dat.pact[[1]]$var.hist <- new.dat.pact[[1]]$ungaImpt
    xlab <- xlab("Voting Similarity \n (Important resolutions)")
  }
  
  if(i == 4) {
    new.dat.pact[[1]]$var.hist <- new.dat.pact[[1]]$logsdp
    xlab <- xlab("SDP (logged)")
  }
  
  ggplot() +
    theme_classic() +
    geom_histogram(aes(var.hist), new.dat.pact[[1]], color="black", fill="gray", bins=20) +
    theme(text = element_text(size = 18),
          strip.text = element_text(size = 18),
          strip.background = element_rect(colour = 'black', fill = 'white', linetype='solid'),
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          axis.text.x = element_text(size = 18, color = "black"),
          axis.text.y = element_text(size = 15, color = "black"),
          plot.margin=unit(c(3,3,3,3), 'mm')) +
    xlab + ylab("Frequency") -> hist.var
  
  if(i == 1) hist.var ->> hist.signal
  if(i == 2) hist.var ->> hist.unga
  if(i == 3) hist.var ->> hist.ungaImpt
  if(i == 4) hist.var ->> hist.sdp
  
}

grid.arrange(hist.signal, hist.unga, hist.ungaImpt, hist.sdp, ncol = 2) -> hist
ggsave("hist.pdf", hist, width = 16, height = 10)  


# information on footnote 22
#=======================================================================================#
#=======================================================================================#
res.signal.all <- lm(signal ~ time + I(time^2) + I(time^3), data = new.dat.pact[[1]])$residual
res.signal.c <- lm(signal ~ as.factor(ccode) + time + I(time^2) + I(time^3), data = new.dat.pact[[1]])$residual

res.unga.all <- lm(unga ~ time + I(time^2) + I(time^3), data = new.dat.pact[[1]])$residual
res.unga.c <- lm(unga ~ as.factor(ccode) + time + I(time^2) + I(time^3), data = new.dat.pact[[1]])$residual

res.sdp.all <- lm(logsdp ~ time + I(time^2) + I(time^3), data = new.dat.pact[[1]])$residual
res.sdp.c <- lm(logsdp ~ as.factor(ccode) + time + I(time^2) + I(time^3), data = new.dat.pact[[1]])$residual

sd(res.signal.c)/sd(res.signal.all)
sd(res.unga.c)/sd(res.unga.all)
sd(res.sdp.c)/sd(res.sdp.all)
